load data

USE_DC <- FALSE

text = read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EML Rosy/EMLLM/info/ia_label_mapping_opt_surprisal.csv')
if (USE_DC){eeg_dir = '/Volumes/Blue1TB/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_recomputed/'
} else {eeg_dir = '/Volumes/Blue1TB/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_nodc/'
}
file_pat = '_reading_N400_stats\\.csv$'
files=list.files(path = eeg_dir, pattern= file_pat)

Loop over participants

all<-c()
i<-0
for (f in files){
    i=i+1
    df <-read.csv(paste0(eeg_dir,f))
    p <- strsplit(f,'_')
    pID <- paste(p[[1]][1],p[[1]][2],sep='_')
    df$pID <- pID
    # df<- df %>% dplyr::select(c("pID", "type","latency","fixDur","task","identifier" , "page_fixation_ix","IA_ID","n400_magnitude_CPz","n400_latency_CPz"  ))
    df<- df %>% filter(task=='reading') %>% droplevels()
    all[[i]] <- df
}
df <- do.call(rbind, all)

# Merge with text info for surprisal values
df <- df %>% merge(text, on=c("IA_ID","identifier"))
df <- df %>% rename(surprisalText=opt.125m_surprisal_wholetext, surprisalPage = opt.125m_surprisal_page,fixDur=duration)
# drop rows with no IA
df <- drop_na(df, "IA_LABEL")
# drop rows with punc
df <- df %>% filter(! IA_LABEL %in% c('.',',','-','--',';',':',"'",'?','!'))

feats <- grep("n400|fixDur",colnames(df), value=T)
feats_eeg <- grep("n400",colnames(df), value=T)

transforms

# df<-df %>% mutate(across(contains('n400_magnitude'),
#              .fns=~log(-.x)), .names="log_neg_{.col}")
df<-df %>% mutate(logFixDur = log(fixDur),
                  logSurprisalText = log(surprisalText),
                  logSurprisalPage = log(surprisalPage))
# lagged surprisal
df <- df %>% group_by(pID, identifier) %>% arrange(page_fixation_ix, .by_group=T) %>% 
  mutate(prev_surprisalText = dplyr::lag(surprisalText, n=1, default=NA),
         prev_surprisalPage = dplyr::lag(surprisalPage, n=1, default=NA),
         prev_logSurprisalText = dplyr::lag(logSurprisalText, n=1, default=NA),
         prev_logSurprisalPage = dplyr::lag(logSurprisalPage, n=1, default=NA),
         )
# refixation
df <- df %>% group_by(pID, identifier, IA_ID) %>% mutate(
  IA_fix_count = n(),
  refixation=as.factor(ifelse(n()>1,1,0)))

plots

df_long <- df %>% pivot_longer(all_of(c(feats,'logFixDur')), names_to="feature",values_to="value")
df_long <- df_long %>% mutate(channel=str_match(feature, 'n400_\\w*_([[:alnum:]]{2,5})$')[,2],
                              feature_type=str_match(feature, '(n400_\\w*)_[[:alnum:]]{2,5}$')[,2])
# compute lower and upper whiskers for each group
ylims <- df_long %>%
  group_by(feature_type) %>%
  summarise(Q1 = quantile(value, 1/100), Q3 = quantile(value, 99/100)) %>%
  ungroup()



p1<-ggplot(df_long %>% filter(feature_type=='n400_mean')) + 
  geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_mean')
p2<-ggplot(df_long %>% filter(feature_type=='n400_min_magnitude')) + 
  geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_min')
p3<-ggplot(df_long %>% filter(feature_type=='n400_max_magnitude')) + 
  geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_max')
grid.arrange(p1,p2,p3)

ggplot(df)+
    geom_density(aes(x=fixDur))+
  scale_x_log10()

ggplot(df)+
    geom_density(aes(x=surprisalText))+
  scale_x_log10()

ggplot(df_long)+
    geom_violin(aes(x=value,y=factor(channel), fill=factor(channel),outliers = FALSE), alpha=0.3)+
  facet_wrap(~feature_type, scales="free") 

ggplot(df_long %>% filter(feature_type =='n400_mean'))+
    geom_point(aes(x=surprisalText,y=value, color=factor(channel)), alpha=0.05, size=0.5)+
  facet_wrap(~channel, scales="free") 

repeat some plots after transformatiions

ggplot(df,aes(x=logSurprisalText, y=logFixDur)) + 
  geom_point(size=1, alpha=0.01)+stat_cor(method="pearson")

ggplot(df_long %>% filter(feature_type =='n400_mean'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
    geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
  facet_wrap(~channel, scales="free") 

ggplot(df_long %>% filter(feature_type =='n400_max_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
    geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
  facet_wrap(~channel, scales="free") 

ggplot(df_long %>% filter(feature_type =='n400_min_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
    geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
  facet_wrap(~channel, scales="free") 

LME stats

predict EEG from LOG surprisal and fixDur

m.l.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.min <- lmer(n400_max_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.max <- lmer(n400_min_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.zc <- lmer(n400_zero_crossings_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
tab_model(m.l.m, m.l.min, m.l.max, m.l.zc, show.ci=F)
  n 400 mean C Pz n 400 max magnitude C Pz n 400 min magnitude C Pz n 400 zero crossings C Pz
Predictors Estimates p Estimates p Estimates p Estimates p
(Intercept) 17.14 <0.001 95.98 <0.001 -64.58 <0.001 1.93 <0.001
logFixDur -2.85 <0.001 -3.65 <0.001 -1.86 0.014 0.03 0.049
logSurprisalText -0.91 <0.001 -1.00 <0.001 -0.74 0.004 0.00 0.888
Random Effects
σ2 10397.76 10716.54 12209.36 4.91
τ00 98.97 pID 908.07 pID 702.36 pID 0.51 pID
26.39 identifier 29.42 identifier 31.20 identifier 0.02 identifier
ICC 0.01 0.08 0.06 0.10
N 50 identifier 50 identifier 50 identifier 50 identifier
91 pID 91 pID 91 pID 91 pID
Observations 104272 104272 104272 104272
Marginal R2 / Conditional R2 0.000 / 0.012 0.000 / 0.081 0.000 / 0.057 0.000 / 0.098
dotplot(ranef(m.l.m))
## $pID

## 
## $identifier

predict fixDur from surprisal

m.d <- lmer(fixDur ~ (1|identifier) + (1|pID) + logSurprisalText, data=df)
tab_model(m.d)
  fix Dur
Predictors Estimates CI p
(Intercept) 208.40 202.58 – 214.22 <0.001
logSurprisalText 1.81 1.38 – 2.24 <0.001
Random Effects
σ2 8839.40
τ00 pID 729.69
τ00 identifier 31.32
ICC 0.08
N identifier 50
N pID 91
Observations 104272
Marginal R2 / Conditional R2 0.001 / 0.080

predict fixation fixDur from EEG features

m.eye <- lmer(fixDur ~ (1|identifier) + (1|pID) + n400_mean_CPz, data=df)
tab_model(m.eye)
  fix Dur
Predictors Estimates CI p
(Intercept) 210.22 204.41 – 216.02 <0.001
n400 mean CPz -0.01 -0.01 – -0.00 0.003
Random Effects
σ2 8844.38
τ00 pID 729.05
τ00 identifier 32.34
ICC 0.08
N identifier 50
N pID 91
Observations 104272
Marginal R2 / Conditional R2 0.000 / 0.079

predict EEG and surprisal from previous IA surprisal

mp.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText + prev_logSurprisalText, data=df)
tab_model(mp.m)
  n 400 mean C Pz
Predictors Estimates CI p
(Intercept) 16.72 9.04 – 24.40 <0.001
logFixDur -2.71 -4.08 – -1.33 <0.001
logSurprisalText -0.87 -1.34 – -0.40 <0.001
prev logSurprisalText -0.52 -0.99 – -0.06 0.028
Random Effects
σ2 10336.85
τ00 pID 100.21
τ00 identifier 26.79
ICC 0.01
N identifier 50
N pID 91
Observations 102838
Marginal R2 / Conditional R2 0.000 / 0.013
mp.s <- lm( logSurprisalText ~ prev_logSurprisalText, data=df)
tab_model(mp.s)
  log Surprisal Text
Predictors Estimates CI p
(Intercept) 0.89 0.88 – 0.90 <0.001
prev logSurprisalText 0.15 0.14 – 0.15 <0.001
Observations 102838
R2 / R2 adjusted 0.022 / 0.022

model including bool for refixation

mr.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + refixation*logSurprisalText, data=df)
tab_model(mr.m)
  n 400 mean C Pz
Predictors Estimates CI p
(Intercept) 16.93 9.23 – 24.62 <0.001
logFixDur -2.86 -4.22 – -1.49 <0.001
refixation [1] 0.54 -1.08 – 2.15 0.515
logSurprisalText -0.49 -1.24 – 0.25 0.192
refixation [1] *
logSurprisalText
-0.68 -1.63 – 0.27 0.160
Random Effects
σ2 10397.75
τ00 pID 99.06
τ00 identifier 26.38
ICC 0.01
N identifier 50
N pID 91
Observations 104272
Marginal R2 / Conditional R2 0.000 / 0.012
Anova(mr.m)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: n400_mean_CPz
##                               Chisq Df Pr(>Chisq)    
## logFixDur                   16.8361  1 0.00004075 ***
## refixation                   0.0206  1  0.8859451    
## logSurprisalText            14.5204  1  0.0001387 ***
## refixation:logSurprisalText  1.9730  1  0.1601283    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_model(mr.m,type = "emm", terms=c("logSurprisalText", "refixation"))

tb<-tableone::CreateTableOne(data = df, vars = c("n400_mean_CPz", "logFixDur"), strata = 'refixation', test=T)
knitr::kable(print(tb))
##                            Stratified by refixation
##                             0              1              p      test
##   n                         36498          67774                     
##   n400_mean_CPz (mean (SD))  1.15 (105.74)  0.98 (100.75)  0.795     
##   logFixDur (mean (SD))      5.27 (0.44)    5.24 (0.49)   <0.001
0 1 p test
n 36498 67774
n400_mean_CPz (mean (SD)) 1.15 (105.74) 0.98 (100.75) 0.795
logFixDur (mean (SD)) 5.27 (0.44) 5.24 (0.49) <0.001

predict EEG from surprisal and fixDur with random slope of surprisal for participant

m.rs.m <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logSurprisalText + logFixDur, data=df)
m.rs.min <- lmer(n400_max_magnitude_CPz ~ (1+logSurprisalText|pID) +  (1|identifier) + logFixDur + logSurprisalText, data=df)
m.rs.max <- lmer(n400_min_magnitude_CPz ~(1+logSurprisalText|pID) +  (1|identifier) + logFixDur + logSurprisalText, data=df)
tab_model(m.rs.m, m.rs.min, m.rs.max, show.ci=F)
  n 400 mean C Pz n 400 max magnitude C Pz n 400 min magnitude C Pz
Predictors Estimates p Estimates p Estimates p
(Intercept) 17.12 <0.001 95.94 <0.001 -64.66 <0.001
logSurprisalText -0.93 0.003 -1.01 0.002 -0.74 0.019
logFixDur -2.84 <0.001 -3.64 <0.001 -1.84 0.015
Random Effects
σ2 10391.53 10709.01 12203.99
τ00 112.30 pID 907.02 pID 706.83 pID
26.66 identifier 29.54 identifier 31.31 identifier
τ11 3.30 pID.logSurprisalText 3.99 pID.logSurprisalText 2.88 pID.logSurprisalText
ρ01 -0.43 pID -0.04 pID -0.05 pID
ICC 0.01 0.08 0.06
N 91 pID 91 pID 91 pID
50 identifier 50 identifier 50 identifier
Observations 104272 104272 104272
Marginal R2 / Conditional R2 0.000 / 0.013 0.000 / 0.081 0.000 / 0.057
dotplot(ranef(m.rs.m))
## $pID

## 
## $identifier

Modeling with comprehension/MW

Load Behavioural data

df_comp <- read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EyeMindLink/Processed/Behaviour/EML1_page_level.csv')
df_comp <- df_comp %>% rename(pID=ParticipantID) %>% 
  mutate(Page=PageNum-1,
         identifier=paste0(Text, Page))
df <- merge(df, df_comp, on=c("pID","identifier"))
df_mw <- df %>% drop_na(MW) %>% mutate(MW=as.factor(MW))

model FIXATION LEVEL N400 ~ surprisal*MW (caveat - MW is at page level)

m.mw <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier)  + logSurprisalText*MW , data=df_mw)
Anova(m.mw)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: n400_mean_CPz
##                      Chisq Df Pr(>Chisq)
## logSurprisalText    0.5167  1     0.4723
## MW                  0.5501  1     0.4583
## logSurprisalText:MW 1.2574  1     0.2621
tab_model(m.mw)
  n 400 mean C Pz
Predictors Estimates CI p
(Intercept) 3.71 -1.92 – 9.34 0.196
logSurprisalText 0.06 -1.29 – 1.40 0.934
MW [1] 2.99 -1.81 – 7.79 0.222
logSurprisalText * MW [1] -1.22 -3.36 – 0.91 0.262
Random Effects
σ2 9889.06
τ00 pID 333.44
τ00 identifier 55.66
τ11 pID.logSurprisalText 2.97
ρ01 pID -0.85
ICC 0.03
N pID 91
N identifier 20
Observations 22322
Marginal R2 / Conditional R2 0.000 / 0.033
plot_model(m.mw,type = "emm", terms=c("logSurprisalText", "MW"))

plot_model(m.mw,type = "emm", terms=c("logFixDur", "MW"))
## `logFixDur` was not found in model terms. Maybe misspelled?
## Can't compute estimated marginal means, 'emmeans::emmeans()' returned an error.
## 
## Reason: No variable named logFixDur in the reference grid
## You may try 'ggpredict()' or 'ggeffect()'.
## NULL
emmeans(m.mw, pairwise ~ MW, p.adjust = "fdr")
## $emmeans
##  MW emmean   SE  df asymp.LCL asymp.UCL
##  0    3.77 2.68 Inf    -1.476      9.01
##  1    5.47 2.90 Inf    -0.223     11.16
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df z.ratio p.value
##  0 - 1        -1.7 2.08 Inf  -0.814  0.4155
## 
## Degrees-of-freedom method: asymptotic

bobby’s mode: MW label at page level, surprosal and n400 at fixation level

# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB <- lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: n400_mean_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |  
##     pID) + (logSurprisalText | pID:identifier)
##    Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 268460.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.8943 -0.5463  0.0153  0.5626  5.7741 
## 
## Random effects:
##  Groups         Name                 Variance Std.Dev. Corr             
##  pID:identifier (Intercept)           1654.44  40.675                   
##                 logSurprisalText        18.15   4.260  -1.00            
##  pID            (Intercept)            579.29  24.068                   
##                 MW1                  11698.49 108.160   0.02            
##                 logSurprisalText        20.62   4.541  -0.19 -0.86      
##                 MW1:logSurprisalText   389.91  19.746  -0.26 -0.95  0.76
##  Residual                             9416.73  97.040                   
## Number of obs: 22322, groups:  pID:identifier, 301; pID, 91
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)            4.7955     4.3513  64.0041   1.102    0.275
## MW1                    4.6060    15.1917 461.7926   0.303    0.762
## logSurprisalText       0.2108     0.9029  44.2160   0.234    0.816
## MW1:logSurprisalText  -1.8703     3.0574  18.9739  -0.612    0.548
## 
## Correlation of Fixed Effects:
##             (Intr) MW1    lgSrpT
## MW1         -0.209              
## lgSrprslTxt -0.455 -0.144       
## MW1:lgSrprT  0.046 -0.909 -0.021
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
tab_model(mB)
  n 400 mean C Pz
Predictors Estimates CI p
(Intercept) 4.80 -3.73 – 13.32 0.270
MW [1] 4.61 -25.17 – 34.38 0.762
logSurprisalText 0.21 -1.56 – 1.98 0.815
MW [1] * logSurprisalText -1.87 -7.86 – 4.12 0.541
Random Effects
σ2 9416.73
τ00 pID:identifier 1654.44
τ00 pID 579.29
τ11 pID:identifier.logSurprisalText 18.15
τ11 pID.MW1 11698.49
τ11 pID.logSurprisalText 20.62
τ11 pID.MW1:logSurprisalText 389.91
ρ01 pID:identifier -1.00
ρ01 pID.MW1 0.02
ρ01 pID.logSurprisalText -0.19
ρ01 pID.MW1:logSurprisalText -0.26
ICC 0.34
N pID 91
N identifier 20
Observations 22322
Marginal R2 / Conditional R2 0.000 / 0.338

min n400

# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.min <- lmer(n400_min_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.min)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## n400_min_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |  
##     pID) + (logSurprisalText | pID:identifier)
##    Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 272324.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.8703 -0.5359  0.0344  0.5788  5.1761 
## 
## Random effects:
##  Groups         Name                 Variance   Std.Dev. Corr             
##  pID:identifier (Intercept)           1897.2779  43.558                   
##                 logSurprisalText        14.9662   3.869  -0.74            
##  pID            (Intercept)           1288.5762  35.897                   
##                 MW1                    104.7103  10.233  -0.55            
##                 logSurprisalText         0.5579   0.747  -0.64  0.84      
##                 MW1:logSurprisalText 35036.4903 187.180   0.38 -0.49 -0.44
##  Residual                            11069.8068 105.213                   
## Number of obs: 22322, groups:  pID:identifier, 301; pID, 91
## 
## Fixed effects:
##                      Estimate Std. Error       df t value            Pr(>|t|)
## (Intercept)           -75.105      5.380   98.438 -13.959 <0.0000000000000002
## MW1                     8.644      6.838  125.248   1.264               0.209
## logSurprisalText        0.130      0.782  138.419   0.166               0.868
## MW1:logSurprisalText    2.405     24.482 4951.092   0.098               0.922
##                         
## (Intercept)          ***
## MW1                     
## logSurprisalText        
## MW1:logSurprisalText    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) MW1    lgSrpT
## MW1         -0.513              
## lgSrprslTxt -0.354  0.253       
## MW1:lgSrprT  0.196 -0.050 -0.063
## optimizer (Nelder_Mead) convergence code: 4 (failure to converge in 10000 evaluations)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
## failure to converge in 10000 evaluations
tab_model(mB.min)
  n 400 min magnitude C Pz
Predictors Estimates CI p
(Intercept) -75.10 -85.65 – -64.56 <0.001
MW [1] 8.64 -4.76 – 22.05 0.206
logSurprisalText 0.13 -1.40 – 1.66 0.868
MW [1] * logSurprisalText 2.41 -45.58 – 50.39 0.922
Random Effects
σ2 11069.81
τ00 pID:identifier 1897.28
τ00 pID 1288.58
τ11 pID:identifier.logSurprisalText 14.97
τ11 pID.MW1 104.71
τ11 pID.logSurprisalText 0.56
τ11 pID.MW1:logSurprisalText 35036.49
ρ01 pID:identifier -0.74
ρ01 pID.MW1 -0.55
ρ01 pID.logSurprisalText -0.64
ρ01 pID.MW1:logSurprisalText 0.38
ICC 0.79
N pID 91
N identifier 20
Observations 22322
Marginal R2 / Conditional R2 0.001 / 0.790

max n400

# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.max <- lmer(n400_max_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.max)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## n400_max_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |  
##     pID) + (logSurprisalText | pID:identifier)
##    Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 269299.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.6498 -0.5492  0.0005  0.5668  5.7653 
## 
## Random effects:
##  Groups         Name                 Variance Std.Dev. Corr             
##  pID:identifier (Intercept)          1720.701 41.481                    
##                 logSurprisalText        0.996  0.998   -1.00            
##  pID            (Intercept)          1540.422 39.248                    
##                 MW1                   325.368 18.038   -1.00            
##                 logSurprisalText       11.519  3.394    0.36 -0.38      
##                 MW1:logSurprisalText  694.098 26.346    0.40 -0.39 -0.21
##  Residual                            9745.094 98.717                    
## Number of obs: 22322, groups:  pID:identifier, 301; pID, 91
## 
## Fixed effects:
##                      Estimate Std. Error       df t value            Pr(>|t|)
## (Intercept)           79.8938     5.5434  49.4935  14.412 <0.0000000000000002
## MW1                    4.2239     6.4477 153.9681   0.655               0.513
## logSurprisalText       0.3479     0.7790  67.8464   0.447               0.657
## MW1:logSurprisalText  -0.5603     3.6415 365.7632  -0.154               0.878
##                         
## (Intercept)          ***
## MW1                     
## logSurprisalText        
## MW1:logSurprisalText    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) MW1    lgSrpT
## MW1         -0.641              
## lgSrprslTxt -0.031  0.069       
## MW1:lgSrprT  0.228 -0.119 -0.226
## optimizer (Nelder_Mead) convergence code: 4 (failure to converge in 10000 evaluations)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
## failure to converge in 10000 evaluations
tab_model(mB.max)
  n 400 max magnitude C Pz
Predictors Estimates CI p
(Intercept) 79.89 69.03 – 90.76 <0.001
MW [1] 4.22 -8.41 – 16.86 0.512
logSurprisalText 0.35 -1.18 – 1.87 0.655
MW [1] * logSurprisalText -0.56 -7.70 – 6.58 0.878
Random Effects
σ2 9745.09
τ00 pID:identifier 1720.70
τ00 pID 1540.42
τ11 pID:identifier.logSurprisalText 1.00
τ11 pID.MW1 325.37
τ11 pID.logSurprisalText 11.52
τ11 pID.MW1:logSurprisalText 694.10
ρ01 pID:identifier -1.00
ρ01 pID.MW1 -1.00
ρ01 pID.logSurprisalText 0.36
ρ01 pID.MW1:logSurprisalText 0.40
ICC 0.28
N pID 91
N identifier 20
Observations 22322
Marginal R2 / Conditional R2 0.000 / 0.279

Eye-mind-linkage metric at page level

note atanh transform for correlation when it is to be used in model

eml_page <- df %>% group_by(pID, identifier) %>% summarise(
    count=n(),
    cor_n400meanCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_mean_CPz)),
    cor_n400minCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_min_magnitude_CPz)),
    cor_n400maxCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_max_magnitude_CPz)),
    cor_logFixDur_logSurprisalText = atanh(cor(logSurprisalText, logFixDur)),
    meanLogSurprisalText=mean(logSurprisalText)
) %>% drop_na() %>% 
  filter(count>2) # remove instances with single or two fixation on page as correlation coeff will be 1 or -1
eml_page[Reduce(`&`, lapply(eml_page, is.finite)),]
## # A tibble: 0 × 8
## # Groups:   pID [0]
## # … with 8 variables: pID <chr>, identifier <fct>, count <int>,
## #   cor_n400meanCPz_logSurprisalText <dbl>,
## #   cor_n400minCPz_logSurprisalText <dbl>,
## #   cor_n400maxCPz_logSurprisalText <dbl>,
## #   cor_logFixDur_logSurprisalText <dbl>, meanLogSurprisalText <dbl>
df_page <- left_join(eml_page, df_comp, on=c('pID', 'identifer')) 
df_page$MW = as.factor(df_page$MW)


# plot the new metrics
p1<-ggplot(df_page %>% drop_na(MW))+
  geom_density( aes(x=cor_n400meanCPz_logSurprisalText, color=MW))
p2<-ggplot(df_page %>% drop_na(MW))+
  geom_density( aes(x=cor_logFixDur_logSurprisalText, color=MW))
p3<-ggplot(df_page %>% drop_na(MW))+
  geom_density( aes(x=cor_n400minCPz_logSurprisalText, color=MW))
p4<-ggplot(df_page %>% drop_na(MW))+
  geom_density( aes(x=cor_n400maxCPz_logSurprisalText, color=MW))
grid.arrange(p1,p2,p3,p4)

# plot by participantID
ggplot(df_page, aes(x=cor_n400meanCPz_logSurprisalText, y=pID)) +
  geom_boxplot(  horizontal = TRUE,        outlier.colour="red",
        outlier.fill="red",
        outlier.size=0.2)

binomial regression: predict page level MW from page level correlations with surprisal

m.mw.mean <- glmer(MW ~ cor_n400meanCPz_logSurprisalText + (1|identifier) + (1+cor_n400meanCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.mean,type = "emm", terms=c("cor_n400meanCPz_logSurprisalText"))

m.mw.min <- glmer(MW ~ cor_n400minCPz_logSurprisalText  + (1|identifier) + (1+cor_n400minCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.min,type = "emm", terms=c("cor_n400minCPz_logSurprisalText"))

m.mw.max <- glmer(MW ~ cor_n400maxCPz_logSurprisalText  + (1|identifier) + (1+cor_n400maxCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.max,type = "emm", terms=c("cor_n400maxCPz_logSurprisalText"))

m.mw.all <- glmer(MW ~ cor_n400meanCPz_logSurprisalText + cor_n400minCPz_logSurprisalText  +cor_n400maxCPz_logSurprisalText  + (1|identifier) + (1|pID), data=df_page %>% drop_na(MW), family = "binomial")

tab_model(m.mw.mean, m.mw.min, m.mw.max)
  MW MW MW
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 0.47 0.27 – 0.83 0.009 0.47 0.27 – 0.83 0.009 0.48 0.47 – 0.48 <0.001
cor n400meanCPz
logSurprisalText
0.31 0.08 – 1.26 0.101
cor n400minCPz
logSurprisalText
0.26 0.05 – 1.24 0.090
cor n400maxCPz
logSurprisalText
0.56 0.55 – 0.56 <0.001
Random Effects
σ2 3.29 3.29 3.29
τ00 1.28 pID 1.27 pID 1.25 pID
0.70 identifier 0.70 identifier 0.68 identifier
τ11 0.02 pID.cor_n400meanCPz_logSurprisalText 0.02 pID.cor_n400minCPz_logSurprisalText 0.26 pID.cor_n400maxCPz_logSurprisalText
ρ01 -1.00 pID -1.00 pID -1.00 pID
ICC 0.38 0.38 0.37
N 20 identifier 20 identifier 20 identifier
91 pID 91 pID 91 pID
Observations 287 287 287
Marginal R2 / Conditional R2 0.039 / 0.400 0.028 / 0.393 0.009 / 0.380
tab_model(m.mw.mean)
  MW
Predictors Odds Ratios CI p
(Intercept) 0.47 0.27 – 0.83 0.009
cor n400meanCPz
logSurprisalText
0.31 0.08 – 1.26 0.101
Random Effects
σ2 3.29
τ00 pID 1.28
τ00 identifier 0.70
τ11 pID.cor_n400meanCPz_logSurprisalText 0.02
ρ01 pID -1.00
ICC 0.38
N identifier 20
N pID 91
Observations 287
Marginal R2 / Conditional R2 0.039 / 0.400
tab_model(m.mw.all)
  MW
Predictors Odds Ratios CI p
(Intercept) 0.47 0.27 – 0.83 0.009
cor n400meanCPz
logSurprisalText
0.19 0.00 – 24.74 0.503
cor n400minCPz
logSurprisalText
0.94 0.01 – 76.33 0.978
cor n400maxCPz
logSurprisalText
1.78 0.20 – 15.68 0.606
Random Effects
σ2 3.29
τ00 pID 1.31
τ00 identifier 0.71
ICC 0.38
N identifier 20
N pID 91
Observations 287
Marginal R2 / Conditional R2 0.046 / 0.410
emmeans(m.mw.mean, pairwise ~ cor_n400meanCPz_logSurprisalText, p.adjust = "fdr")
## $emmeans
##  cor_n400meanCPz_logSurprisalText emmean    SE  df asymp.LCL asymp.UCL
##                          0.000208 -0.753 0.287 Inf     -1.32    -0.189
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast  estimate SE df z.ratio p.value
##  (nothing)   nonEst NA NA      NA      NA
## 
## Note: contrasts are still on the logit scale

Does page-average surprisal predict MW

m.mw.sp <- glmer(MW ~ meanLogSurprisalText  +  (1+meanLogSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
tab_model(m.mw.sp)
  MW
Predictors Odds Ratios CI p
(Intercept) 0.25 0.08 – 0.81 0.020
meanLogSurprisalText 1.96 0.61 – 6.24 0.256
Random Effects
σ2 3.29
τ00 pID 1.91
τ11 pID.meanLogSurprisalText 4.98
ρ01 pID -0.93
ICC 0.36
N pID 91
Observations 287
Marginal R2 / Conditional R2 0.010 / 0.367